* ==========================================================
* ----------- ANES 2008
* ==========================================================

use "${dir_raw}/anes2008_2009panel_dataset.dta",clear

gen id = caseid
* ------- respondent's data
* demographic information
gen r_sex = der01
	gen r_female = der01 == 2 if ~missing(der01)

gen r_age = der02 // der02 
gen r_race = der23 if der23 > 0 // der04 or der23
	gen r_white = r_race == 1 if ~missing(r_race)
	gen r_black = r_race == 2 if ~missing(r_race)
	gen r_others = r_race == 3 if ~missing(r_race)

gen r_degree = der05 if der05 > 0 // der05 
gen r_educ = cpq15 if cpq15 > 0 
recode r_educ (1=0) (2=4) (3=6) (4=8) (5=9) (6=10) (7=11) (8=12) (9=12) (10=13) (11=14) (12=16) (13=18) (14=20)

gen r_marstat = der24 if der24 > 0 // der24 
gen r_married = r_marstat == 1 if ~missing(r_marstat)

gen r_wrkstat = cpq17 if cpq17 > 0 
	replace r_wrkstat = w11zg1 if missing(r_wrkstat) & w11zg1 > 0 

*        1. Working - as a paid employee |      1,511       35.64       67.69
*             2. Working - self-employed |        335        7.90       75.59
*3. Not working - on temporary layoff fr |         16        0.38       75.97
*      4. Not working - looking for work |         94        2.22       78.18
*               5. Not working - retired |        564       13.30       91.49
*              6. Not working - disabled |        141        3.33       94.81
*                 7. Not working - other |        220        5.19      100.00
	recode r_wrkstat (1 2 =1) (else=0), gen(r_working)
	recode r_wrkstat (3 4=1) (else=0), gen(r_unemployed)
	recode r_wrkstat (5=1) (else=0), gen(r_retired)
	recode r_wrkstat (6 7 8=1) (else=0), gen(r_otherworks)
 
gen r_adults = der18 // der18 (eligible person)
gen r_watch_internet = w9f3 if w9f3 > 0 

gen r_no_religion = der22 == 5 if der22 > 0 
gen r_attendance_y = w9zg1a_3 if w9zg1a_3 >= 0 
gen r_attendance_m = w9zg1a_2 *12 if w9zg1a_2 >= 0 
gen r_attendance_w = w9zg1a_1 * 52 if w9zg1a_1 >=0
gen r_attendance = r_attendance_y 
	replace r_attendance = r_attendance_m if ~missing(r_attendance)
	replace r_attendance = r_attendance_w if ~missing(r_attendance)

* political information
gen r_partyid7 = der08w9 + 1 if der08w9 >=0

gen r_partyid4 = w9l1 if w9l1 > 0 
replace r_partyid4 = w9l3 if w9l3 > 0 


gen r_ideo_w6 = der09w6 if der09w6 > 0 
gen r_ideo_w10 = der09w10 if der09w10 > 0 

gen r_ideo = r_ideo_w6
replace r_ideo = r_ideo_w10 if missing(r_ideo)

gen r_party_int = abs(r_partyid7-4)
gen r_ideo_int = abs(r_ideo-4)

gen r_pol_interest = 5- w9h1 if w9h1 > 0 
*         1. Extremely interested |        627       14.79       50.19
*              2. Very interested |        995       23.47       73.66
*        3. Moderately interested |        799       18.84       92.50
*          4. Slightly interested |        266        6.27       98.77
*        5. Not interested at all |         52        1.23      100.00

gen r_talk_freq = w9h2 if w9h2 >=0 

* talk frequency across time within individual 
gen r_talk_freq1  = w1k2  if  w1k2 >=0 
gen r_talk_freq2  = w2g2  if  w2g2 >=0 
gen r_talk_freq9  = w9h2  if  w9h2 >=0 
gen r_talk_freq10 = w10h2 if w10h2 >=0 
gen r_talk_freq11 = w11h2 if w11h2 >=0 
gen r_talk_freq19 = w19h2 if w19h2 >=0 

*gen state= w9zw1 if w9zw1 > 0 
gen state_h = w9xhomest // use this
gen state_v = w9xregst 
gen state_v2 = w9regst // updated voter registration
gen state = w9xhomest 


gen region = wgtregion if wgtregion > 0

* interview context related
gen i_recruit = rflagin == 1
gen i_access = rrhasaccess == 1 if rrhasaccess > 0 
gen i_access_home = rwebaccesshome == 1 if rwebaccesshome > 0
gen i_access_work = rwebaccesswkscl == 1 if rwebaccesswkscl > 0 

gen i_phonelines = rphonelines if rphonelines >=0
gen i_size_household = rhhsize if rhhsize > 0 

* survey weights
gen sampcode = stratum 
gen wtall = wgtcs09

* also think of other "panel" weights + baseline weights / post-stratification weights
gen wave9 = w9flag == 1 

gen year = 2008
gen str_dateintv = w9date
replace str_dateintv = "" if str_dateintv == ". . ."

gen month = "" 
replace month = substr(str_dateintv,5,2)

gen day = ""
replace day = substr(str_dateintv, 7,2)
gen yy = year if str_dateintv != ""

gen svydate = day+"/"+month+"/"+string(yy) if str_dateintv != ""
gen svydate2 = date(svydate, "DMY")
format %td svydate2 

* ---------- network level information
gen n_base = w9zd1 == 1 if w9zd1 > 0
gen n_size = der17 if der17 >= 0
	replace n_size = 0 if n_base == 0
recode n_size (3/max=3), gen(n_size3)

* relative?
gen a_rel1 = w9zd6_1==1 if w9zd6_1 > 0 
gen a_rel2 = w9zd6_2==1 if w9zd6_2 > 0 
gen a_rel3 = w9zd6_3==1 if w9zd6_3 > 0 

egen n_relative = rowtotal(a_rel*)
replace n_relative = . if missing(n_size)

egen p_relative = rowmean(a_rel*)

* relationship with alters 
* close : w9zd4_1 
* network closeness : w9zd8a(1-2) / w9zd8b(1-3) / w9zd8c(2-3)
* opinon diff : w9zd9_1 
* livetogheter : w9zd5_1 
* relative : w9zd6_1 

* ----------------- alter characteristics 
* gender : w9zd7_1 
* likely vote : w9zd10_1 
* interest : w9zd17_1
* same denomination : w9zd19_1 
* age : w9zd20_1 
* education : w9zd23_1 
* hispanic : w9zd21_1 
* each race/ethnicity : w9zd22_1_1 - w9zd22_1_16 / w9zd22_2_1 - w9zd22_2_16 ...
* religion : w9zd18_1 
* democrat : w9zd12_1 / w9zd14_1 
* republican : w9zd13_1 / w9zd15_1
* moderate : w9zd16_1 

forvalues i = 1/3{
	gen a`i'_closeness = 5-w9zd4_`i' if w9zd4_`i' >0
	gen a`i'_live_together = w9zd5_`i' ==1 if w9zd5_`i' > 0
	gen a`i'_relative = w9zd6_`i' ==1 if w9zd6_`i' > 0
	gen a`i'_male = w9zd7_`i' == 1 if w9zd7_`i' > 0 
	gen a`i'_disagree = 5-w9zd9_`i' if w9zd9_`i' > 0 
	gen a`i'_vote_like = 5-w9zd10_`i' if w9zd10_`i' > 0 
	gen a`i'_vote_like2 = a`i'_vote_like == 4 if ~missing(a`i'_vote_like)
	gen a`i'_interest = 5-w9zd17_`i' if w9zd17_`i' > 0 
	gen a`i'_religion = w9zd18_`i' if w9zd18_`i' > 0 
	recode a`i'_religion (1=1) (2=3) (3 4 5 6=4) (7=5)
	gen a`i'_same_church = w9zd19_`i' == 1 if w9zd19_`i' > 0 
	gen a`i'_age = w9zd20_`i' if w9zd20_`i'  > 0 
	gen a`i'_hispanic = w9zd21_`i' == 1 if w9zd21_`i' > 0 
	gen a`i'_white = w9zd22_`i'_1 if w9zd22_`i'_1 > 0 
	gen a`i'_black = w9zd22_`i'_2 if w9zd22_`i'_2 > 0 
	gen a`i'_educ = w9zd23_`i' if w9zd23_1 > 0 
	recode a`i'_educ (min/0 =.) (1=0) (2=4) (3=6) (4=8) (5=9) (6=10) (7=11) (8=12) (9=12) (10=14) (11=14) (12=16) (13=18) (14=20)
}



* R's religion 
*             der22. DERIVED. Religion |      Freq.     Percent        Cum.
*---------------------------------------+-----------------------------------
*      -6. Not asked, unit non-response |      1,246       29.39       29.39
*             -5. Not asked, terminated |          5        0.12       29.50
*-2. Missing, miscellaneous nonresponse |        285        6.72       36.23
*                         1. Protestant |      1,284       30.28       66.51
*                           2. Catholic |        634       14.95       81.46
*                             3. Jewish |         92        2.17       83.63
*                              4. Other |        338        7.97       91.60
*                        5. No Religion |        356        8.40      100.00
*---------------------------------------+-----------------------------------
*                                 Total |      4,240      100.00

* Alter's party id
forvalues i = 1/3{
	gen a`i'_partyid4 = w9zd12_`i' if w9zd12_`i' > 0
	replace a`i'_partyid4 = 1 if w9zd13_`i' == 2 
	replace a`i'_partyid4 = 2 if w9zd13_`i' == 1 
	replace a`i'_partyid4 = 3 if w9zd13_`i' == 3 
	replace a`i'_partyid4 = 4 if w9zd13_`i' == 4 
	replace a`i'_partyid4 = 9 if (n_size > `i'-1 & ~missing(n_size)) & missing(a`i'_partyid4)
}

* party ID uncertainty
forvalues i = 1/3{
	gen a`i'_partyid_dk = (a`i'_partyid4==9) if ~missing(a`i'_partyid4)
}

egen n_partyid_dk = rowtotal(a?_partyid_dk)
egen p_partyid_dk = rowmean(a?_partyid_dk)

* strength in party id
forvalues i = 1/3{
	gen pa1_`i' = w9zd14_`i' if w9zd14_`i' > 0
	replace pa1_`i' = w9zd15_`i' if w9zd15_`i' > 0
	replace pa1_`i' = 9 if (n_size > `i'-1 & ~missing(n_size)) & missing(pa1_`i') & (a`i'_partyid4 ==1 |a`i'_partyid4 ==2)
}

* closeness in independent / something else in alter
forvalues i = 1/3{
	gen pa2_`i' = w9zd16_`i' if w9zd16_`i' > 0 
	replace pa2_`i' = 9 if (n_size > `i'-1 & ~missing(n_size)) & missing(pa2_`i') & (a`i'_partyid4 ==3 |a`i'_partyid4 ==4)
}

* party - 7 scale 
forvalues i = 1/3{
	gen a`i'_partyid7 = .
	replace a`i'_partyid7 = 1 if a`i'_partyid4 == 1 & pa1_`i' == 1  
	replace a`i'_partyid7 = 2 if a`i'_partyid4 == 1 & pa1_`i' == 2 
	replace a`i'_partyid7 = 3 if a`i'_partyid4 == 3 & pa2_`i' == 2 
	replace a`i'_partyid7 = 4 if a`i'_partyid4 == 3 & pa2_`i' == 3
	replace a`i'_partyid7 = 5 if a`i'_partyid4 == 3 & pa2_`i' == 1 
	replace a`i'_partyid7 = 6 if a`i'_partyid4 == 2 & pa1_`i' == 2 
	replace a`i'_partyid7 = 7 if a`i'_partyid4 == 2 & pa1_`i' == 1 
	* consider something else category
	replace a`i'_partyid7 = 3 if a`i'_partyid4 == 4 & pa2_`i' == 2
	replace a`i'_partyid7 = 4 if a`i'_partyid4 == 4 & pa2_`i' == 3
	replace a`i'_partyid7 = 5 if a`i'_partyid4 == 4 & pa2_`i' == 1  
}

* party - 3scale
forvalues i = 1/3{
	gen a`i'_partyid3 = w9zd12_`i' if w9zd12_`i' > 0
	replace a`i'_partyid3 = 3 if w9zd12_`i' == 4 
	replace a`i'_partyid3 = 1 if w9zd13_`i' == 2 
	replace a`i'_partyid3 = 2 if w9zd13_`i' == 1 
	replace a`i'_partyid3 = 3 if w9zd13_`i' == 3 
	replace a`i'_partyid3 = 3 if w9zd13_`i' == 4 
	*replace a`i'_partyid# = 9 if (n_size > `i'-1 & ~missing(n_size)) & missing(a`i'_partyid4)
	recode a`i'_partyid3 (1=1) (2=3) (3=2)
}

gen r_partyid3 = r_partyid7
	recode r_partyid3 (1 2 =1) (3 4 5=2) (6 7=3) 

gen a1_same_partyid3 = (r_partyid3 == a1_partyid3) if ~missing(r_partyid3) & ~missing(a1_partyid3)
gen a2_same_partyid3 = (r_partyid3 == a2_partyid3) if ~missing(r_partyid3) & ~missing(a2_partyid3)
gen a3_same_partyid3 = (r_partyid3 == a3_partyid3) if ~missing(r_partyid3) & ~missing(a3_partyid3)

gen a1_same_partyid4 = (r_partyid4 == a1_partyid4) if ~missing(r_partyid3) & ~missing(a1_partyid3)
gen a2_same_partyid4 = (r_partyid4 == a2_partyid4) if ~missing(r_partyid3) & ~missing(a2_partyid3)
gen a3_same_partyid4 = (r_partyid4 == a3_partyid4) if ~missing(r_partyid3) & ~missing(a3_partyid3)

gen a1_party_homophily = (6-abs(a1_partyid7-r_partyid7))/6
gen a2_party_homophily = (6-abs(a2_partyid7-r_partyid7))/6
gen a3_party_homophily = (6-abs(a3_partyid7-r_partyid7))/6

egen p_party_homophily = rowmean(a?_party_homophily)

egen p_same_vote = rowmean(a?_same_partyid4)
egen n_same_vote = rowtotal(a?_same_partyid4)

* state-level 
bysort state: egen state_n = count(id)
encode state, gen(ppstaten)

gen dataset = "ANES"

* generate dyad data
preserve 
keep id a?_* 
reshape long a@_live_together a@_closeness a@_relative a@_age a@_male a@_white a@_black a@_hispanic a@_educ /*
*/ a@_partyid3 a@_partyid4 a@_partyid_dk a@_partyid7 a@_same_partyid3 a@_same_partyid4 a@_party_homophily  /*
*/ a@_disagree a@_vote_like a@_vote_like2 a@_interest a@_religion a@_same_church, i(id) j(order)

drop if missing(a_live_together)

saveold "${dir_processed}/dyad_2008.dta", replace version(12)
restore 

keep id r_* wtall sampcode svydate svydate2 year month day dataset /*
*/ i_* n_size state*  a* n_* p_*

saveold "${dir_processed}/anes_2008.dta", replace version(12)

